home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power Programmierung
/
Power-Programmierung CD 2 (Tewi)(1994).iso
/
doc
/
mir
/
replace1.c
< prev
next >
Wrap
Text File
|
1992-07-02
|
12KB
|
345 lines
/*
* usage: replace1 input output [ replacement_table ]
*
* REPLACE1 Replaces each byte in an input file with exactly one
* alternative ASCII character. The default replacement table
* built into the program is one that replaces high-bit-set
* graphic characters with reasonable equivalents. An alternate
* table may be used simply by naming it as an argument. This
* high speed 1 for 1 replacement can be used for anything from
* EBCDIC/ASCII conversion to a crude encryption.
*
* input: [1] Any file
* [2] The optional replacement table consists of ASCII lines in
* any order. Each line has two columns separated by white space.
* The first column in each line is the incoming character or a
* range of characters separated by only a hyphen; the second
* column is the replacement. Non-printable characters are shown
* by a backslash followed by two hex digits (for example, \08 for
* backspace). Backslash itself is shown by \5C. Any characters
* not in the replacement table are retained.
*
* output: File same size as input with some/all characters replaced.
*
* writeup: MIR TUTORIAL TWO, topic 6
*
* Written: Douglas Lowry Mar 06 92
* Copyright (C) 1992 Marpex Inc.
*
* The MIR (Mass Indexing and Retrieval) Tutorials explain detailed
* usage and co-ordination of the MIR family of programs to analyze,
* prepare and index databases (small through gigabyte size), and
* how to build integrated retrieval software around the MIR search
* engine. The fifth of the five MIR tutorial series explains how
* to extend indexing capability into leading edge search-related
* technologies. For more information, GO IBMPRO on CompuServe;
* MIR files are in the DBMS library. The same files are on the
* Canada Remote Systems BBS. A diskette copy of the Introduction
* is available by mail ($10 US... check, Visa or Mastercard);
* diskettes with Introduction, Tutorial ONE software and the
* shareware Tutorial ONE text cost $29. Shareware registration
* for a tutorial is also $29.
*
* E-mail...
* Compuserve 71431,1337
* Internet doug.lowry%canrem.com
* UUCP canrem!doug.lowry
* Others: doug.lowry@canrem.uucp
*
* FAX... 416 963-5677
*
* "Snail mail"... Douglas Lowry, Ph.D.
* Marpex Inc.
* 5334 Yonge Street, #1102
* North York, Ontario
* Canada M2N 6M2
*
* Related database consultation and preparation services are
* available through:
* Innotech Inc., 2001 Sheppard Avenue E., Suite #118,
* North York, Ontario Canada M2J 4Z7
* Tel. 416 492-3838 FAX 416 492-3843
*
* This program is free software; you may redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* (file 05LICENS) along with this program; if not, write to the
* Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
* USA.
*/
#include <stdio.h>
#define repeat for(;;)
#define MAX_BYTES 2048
/*
* declarations
*/
typedef enum _bool
{ FALSE = 0, TRUE = 1 } Bool;
void Usage_(), process(), load_table() ;
unsigned char get_hex() ;
char *Cmdname_() { return( "replace1" ); }
/*
* MAIN
*/
main( argc, argv )
int argc;
char **argv;
{
/* This table strips accents, turns box characters to asterisks, bar
characters to equals, pointers to o, and all else from hex 7f up
to blanks */
unsigned char table[256] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
' ','!','"','#','$','%','&', 39,'(',')','*','+',',','-','.','/',
'0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W','X','Y','Z','[', 92,']','^','_',
'`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~', 32,
/* Ç ü é â ä à å ç ê ë è ï î ì Ä Å */
'c','u','e','a','a','a','a','c','e','e','e','i','i','i','A','A',
/* É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ */
'E','a','A','o','o','o','u','u','y','o','u', 32, 32, 32, 32, 32,
/* á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » */
'a','i','o','u','n','N','a','o', 32, 32, 32, 32, 32, 32,'o','o',
/* ░ ▒ ▓ │ ┤ ╡ ╢ ╖ ╕ ╣ ║ ╗ ╝ ╜ ╛ ┐ */
'=','=','=', 32, 32, 32, 32, 32, 32, 32,'*','*','*', 32, 32, 32,
/* └ ┴ ┬ ├ ─ ┼ ╞ ╟ ╚ ╔ ╩ ╦ ╠ ═ ╬ ╧ */
32, 32, 32, 32, 32, 32, 32, 32,'*','*', 32, 32, 32,'*', 32, 32,
/* ╨ ╤ ╥ ╙ ╘ ╒ ╓ ╫ ╪ ┘ ┌ █ ▄ ▌ ▐ ▀ */
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
/* α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ φ ε ∩ */
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
/* ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■ */
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
};
FILE *fp, *fp_out ;
char c10 ;
Bool get_table ; /* user specifies table name */
/* usage: replace1 input output [ replacement_table ] */
if( argc < 3 || argc > 4 )
Usage_() ;
c10 = argv[1][0] ;
if( c10 == '-' || c10 == '/' || c10 == '?' )
Usage_() ;
if( argc == 4 )
{
if(( fp = fopen( argv[3], "r" )) == NULL )
{
fprintf( stderr, "\nUnable to open file %s.\n", argv[3] );
Usage_();
}
load_table( fp, table ) ;
fclose( fp ) ;
}
if(( fp = fopen( argv[1], "rb" )) == NULL )
{
fprintf( stderr, "\nUnable to open file %s.\n", argv[1] );
Usage_();
}
if(( fp_out = fopen( argv[2], "wb" )) == NULL )
{
fprintf( stderr, "\nUnable to open file %s.\n", argv[2] );
Usage_();
}
process( fp, fp_out, table ) ;
fclose( fp );
fclose( fp_out );
exit( 0 );
}
/*
* Usage
*/
void
Usage_()
{
fprintf( stderr,
"Usage: %s input output [ replacement_table ]\n\n\
Replaces each byte in an input file with exactly one\n\
alternative ASCII character. The default replacement table\n\
built into the program is one that replaces high-bit-set\n",
Cmdname_() );
fprintf( stderr,
" graphic characters with reasonable equivalents. An alternate\n\
table may be used simply by naming it as an argument. This\n\
high speed 1 for 1 replacement can be used for anything from\n\
EBCDIC/ASCII conversion to a crude encryption.\n\
input: [1] Any file\n" ) ;
fprintf( stderr,
" [2] The optional replacement table consists of ASCII lines in\n\
any order. Each line has two columns separated by white space.\n\
The first column in each line is the incoming character or a\n\
range of characters separated by only a hyphen; the second\n" ) ;
fprintf( stderr,
" column is the replacement. Non-printable characters are shown\n\
by a backslash followed by two hex digits (for example, \\08 for\n\
backspace). Backslash itself is shown by \\5C. Any characters\n\
not in the replacement table are retained.\n\
output: File same size as input with some/all characters replaced.\n" ) ;
fprintf( stderr, "writeup: MIR TUTORIAL TWO, topic 6\n\n" ) ;
exit( 1 ) ;
}
/*
* PROCESS
*/
void
process( fp_in, fp_out, table )
FILE *fp_in, *fp_out ;
unsigned char table[256] ;
{
unsigned char buf[ MAX_BYTES ],
out;
int len, i ;
while(( len = fread( buf, sizeof( char ), MAX_BYTES, fp_in )) > 0 )
{
for( i = 0; i < len ; i++ )
{
out = table[ buf[ i ] ];
if( fputc( out, fp_out ) != out )
{
fprintf( stderr, "FATAL... Unable to write.\n\n" ) ;
exit( 1 ) ;
}
}
}
return ;
}
/*
* LOAD_TABLE
*/
void
load_table( fp, table )
FILE *fp ;
unsigned char table[256] ;
{
unsigned char buf[ 120 ],
replace, /* replacement value */
from, to ;
Bool foul_up ;
int len, pt, i ;
for( i = 0 ; i < 256 ; i++ )
table[i] = ( unsigned char ) i ;
foul_up = FALSE ;
while( fgets( buf, 120, fp ) != NULL )
{
len = strlen( buf ) ;
if( len > 110 )
foul_up = TRUE ;
pt = 0 ;
while( isspace( buf[ pt ] ))
pt++ ;
/* Get first column */
if( buf[pt] == '\\' )
{
from = get_hex( &buf[pt+1], &foul_up );
pt += 3 ;
}
else
from = buf[ pt++ ] ;
/* Check if a range in first column */
if( buf[pt] == '-' )
{
pt++ ;
if( buf[pt] == '\\' )
{
to = get_hex( &buf[pt+1], &foul_up );
pt += 3 ;
}
else
to = buf[ pt++ ] ;
}
else
to = from ;
/* Get the second column = replacement character */
while( isspace( buf[ pt ] ))
pt++ ;
if( buf[pt] == '\\' )
{
replace = get_hex( &buf[pt+1], &foul_up );
pt += 3 ;
}
else
replace = buf[ pt++ ] ;
for( i = from ; i < to + 1 ; i++ )
table[ i ] = replace ;
if( foul_up )
{
fprintf( stderr, "Bad line in table...\n\t%s\n", buf ) ;
Usage_() ;
}
}
return ;
}
/*
* GET_HEX - Get the value of two hex digits
*/
unsigned char
get_hex( buf, foul_up )
unsigned char buf[] ;
Bool *foul_up ;
{
int i ;
unsigned char nib[2],
c ;
for( i = 0 ; i < 2 ; i++ )
{
nib[ i ] = -1;
c = buf[ i ] ;
if( c >= 0x30 && c <= 0x39 ) /* 0...9*/
nib[ i ] = c - 0x30;
else if( c > 0x40 && c < 0x47 ) /* A...F*/
nib[ i ] = 9 + c - 0x40;
else if( c > 0x60 && c < 0x67 ) /* a...f*/
nib[ i ] = 9 + c - 0x60;
if( nib[ i ] == -1 )
*foul_up = TRUE ;
}
c = ( nib[ 0 ] << 4 ) | nib[ 1 ] ;
return( c ) ;
}